####################################
#Diff-in-diff with linear point as SD estimation (no matching) 
#Traffic and Political Attitudes
#Connor Jerzak & Brian Libgober
####################################

iv_estimation <- function(my_data, #data frame 
                          causal_var_name, 
                              treatment_basis_name, #character string 
                              control_basis_name, #character vector 
                              outcome_time1_name, #character string 
                              outcome_time2_name, #character string 
                              treatment_basis_treated_max, #numeric 
                              treatment_basis_control_min, #numeric 
                              control_basis_control_max, #numeric 
                              control_basis_treated_min, #numeric 
                              fixed_effect_names=NULL, 
                              conf_level=0.95, 
                              match=F, 
                              start_browser=F, 
                              type="binary",
                              my_method = "nearest", 
                              my_distance="mahalanobis", 
                              my_discard="none", 
                              my_caliper = my_caliper, 
                              my_m.order="largest", 
                              my_replace=T)
{#begin function brackets 
  require(MatchIt, quietly=T)
  
  if(start_browser==T){browser()}
  
  #internal cleaning
  my_data_original <- my_data
  my_data <- as.data.frame(
    cbind(apply(X=my_data[,which(!(colnames(my_data) %in% c("COUNTYFP10", "STATEFP10"))) ], 
                MARGIN=2, 
                function(x) as.numeric(as.character(x))),
          data.frame(my_data$COUNTYFP10, my_data$STATEFP10) ) )
  colnames(my_data)[ncol(my_data)-1] <- "COUNTYFP10"
  colnames(my_data)[ncol(my_data)] <- "STATEFP10"
  
  #get treatment basis into memory 
  treatment_basis <- eval(parse(text=paste("my_data$", treatment_basis_name, "")))
  
  #get control basis into memory 
  control_basis <- eval(parse(text=paste("my_data$", control_basis_name, "")))
  
  #determine treated_indicator 
  my_data <- cbind(my_data, NA) 
  colnames(my_data)[ncol(my_data)] <- "treated_indicator"
  my_data$treated_indicator[treatment_basis <= treatment_basis_treated_max & 
                              control_basis >= control_basis_treated_min] <- 1
  my_data$treated_indicator[control_basis <= control_basis_control_max & 
                              treatment_basis >= treatment_basis_control_min] <- 0
  
  my_data <- my_data[is.na(my_data$treated_indicator)==F,]
  matching_variables_2009 <- gsub(matching_variables, pattern = "_2000", replace = "_2009")
  matching_variables_2009 <- matching_variables_2009[matching_variables_2009 %in% colnames(my_data)]
  keep_columns <- colnames(my_data) %in% c(outcome_time1_name, outcome_time2_name, "treated_indicator", 
                                           treatment_basis_name, control_basis_name, fixed_effect_names, 
                                           matching_variables_2009, matching_variables, 
                                           causal_var_name)
  my_data <- my_data[,c(which(keep_columns), which(colnames(my_data) %in% matching_variables))]
  my_data_preMatch <- my_data
  matching_variables <- matching_variables[matching_variables %in% colnames(my_data) ]
  
  #prepare for the linear regression 
  if(match==T)
  { 
    #internally remove rows with NAs from the dataset. 
    my_data <- na.omit(my_data)
    
    matching_formula <- paste("treated_indicator ~", paste(matching_variables, collapse="+"))
    if(my_method=="genetic")
    {  
      my_match <- matchit(as.formula(matching_formula), data = my_data, 
                          method = my_method, 
                          distance= my_distance, 
                          discard= my_discard, 
                          #m.order=my_m.order, 
                          replace=my_replace)
    }
    
    if(my_method!="genetic")
    { 
      my_match <- try(matchit(as.formula(matching_formula), data = my_data, 
                          method = my_method, 
                          distance= my_distance, 
                          discard= my_discard, 
                          m.order=my_m.order, 
                          my_caliper = my_caliper, 
                          replace=my_replace), T)
    }  
    my_data <- try(match.data(my_match), T)
  }

  #prematch results 
  my_data_preMatch$OutcomeGain <- as.numeric(as.character( my_data_preMatch[,outcome_time2_name] )) - 
                                    as.numeric(as.character( my_data_preMatch[,outcome_time1_name] ))
  my_data_preMatch <- na.omit(my_data_preMatch )
  
  #post-match results 
  my_data$OutcomeGain <- try(as.numeric(as.character( my_data[,outcome_time2_name] )) - 
                              as.numeric(as.character( my_data[,outcome_time1_name] )), T)
  #IV analysis 
  my_data_use <- my_data
  binary_first_stage <- T
  if(binary_first_stage == T)
  { 
  lm_first_stage <-  lm(as.formula(
      paste(causal_var_name, "~treated_indicator", sep="" ) ) , 
      #paste(causal_var_name, paste(c("~treated_indicator", matching_variables), collapse="+"), sep="", collapse="")), 
      data=my_data_use)
  X_hat <- predict( lm_first_stage )
  } 
  
  if(binary_first_stage == F)
  { 
    lm_first_stage <-  lm(as.formula(
      paste(causal_var_name, paste(c(treatment_basis_name, control_basis_name), collapse="*"), sep="~") ) , 
      #paste(causal_var_name, paste(c(treatment_basis_name, control_basis_name, matching_variables), collapse="+"), sep="~", collapse="")), 
      data=my_data_use)
    X_hat <- predict( lm_first_stage )
  } 
  
  my_data_second_stage <- as.data.frame(  cbind(my_data_use, X_hat) ) 
  lm_second_stage <- lm( as.formula(
    paste("OutcomeGain~", paste(c("X_hat", fixed_effect_names, matching_variables), collapse="+"), sep="", collapse="") ), 
    data = my_data_second_stage)  

  lm_results <- list(round(summary(lm_first_stage)[[4]][2,], 10), 
                      round(summary(lm_second_stage)[[4]][2,], 10) )

  
  browser() 
  AER_return <- T 
  if(AER_return == T)
  { 
    library(AER)
    data("CigarettesSW", package = "AER")
    CigarettesSW$rprice <- with(CigarettesSW, price/cpi)
    CigarettesSW$rincome <- with(CigarettesSW, income/population/cpi)
    CigarettesSW$tdiff <- with(CigarettesSW, (taxs - tax)/cpi)
    
    ## model 
    fm <- ivreg(log(packs) ~ log(rprice) + log(rincome) | log(rincome) + tdiff + I(tax/cpi),
                data = CigarettesSW, subset = year == "1995")
    summary(fm)
    summary(fm, vcov = sandwich, df = Inf, diagnostics = TRUE)
    
    
    my_data[,matching_variables]

    
    control_variables 
    
    ivreg_formula <- as.formula(
      paste(
      #paste(sprintf("OutcomeGain~%s",paste('causal_var_name +',  matching_variables, collapse = "+"))),
        sprintf("OutcomeGain~%s",causal_var_name), 
      #paste(sprintf("|", causal_var_name), paste(matching_variables, '+treated_indicator', collapse = "+"), sep=""),
      paste(sprintf("|", causal_var_name), 'treated_indicator'),  
      collapse = "", sep = "")   ) 
    
    ivreg_list <- ivreg(formula = ivreg_formula, data = my_data)
    summary( ivreg_list )
    summary(ivreg_list, vcov = sandwich, df = Inf, diagnostics = TRUE)
  }
  
  
  #return
  return_list <- list(lm_summary= list(lm_results), 
                      first_stage = lm_first_stage, 
                      second_stage = lm_second_stage)
  return(return_list)
}#end function barkets 
